##############################################################
##############################################################
### Replication Materials for
### Stefan Müller and Liam Kneafsey:
### Evidence for the Irrelevance of Irrelevant Events
### Political Science Research and Methods
###
### Please get in touch with the authors if you have any questions: 
### stefan.mueller@ucd.ie
### Note: 0000_readme.pdf contains 
### instructions and details about each script and dataset
##############################################################
##############################################################

## load packages
library(dplyr)   # CRAN v1.0.5
library(tidyr)   # CRAN v1.1.3
library(ggplot2) # CRAN v3.3.3
library(Hmisc)   # CRAN v4.4-2
library(scales)  # CRAN v1.1.1
library(here)    # CRAN v1.0.1


here::here()
## alternatively, set working directory manually
## setwd("...")


## import code for custom ggplot2 theme
source("function_theme_base.R")

## load ISSP data. The raw dataset with all variables can be 
## downloaded at https://doi.org/10.4232/1.10079 
## after a free registration 
## The raw dataset is called ZA4850_v2-0-0.dta
## 
## Using dat_issp <- foreign::read.dta("ZA4850_v2-0-0.dta")
## you can import the full dataset
## Here, we use a subsetted dataset containing the relevant variables 
## for the descriptive plots 
## V5 = country; 
## V44 = which sport is most frequently watched
## V45 = which sport is a respondent's second most frequently watched sports
## V46 = degree of pride when country performs well in sports on the international stage

## load ISSP data
dat_issp_raw <- readRDS("data_issp_2009_sports.rds")


## separate V5 into country abbreviation and country
## recode very proud to a binary numeric variable
## proud_very_proud_num measures whether a respondent is proud or very proud
## when the country performs well in sports on the international stage

dat_issp <- dat_issp_raw %>% 
    separate(V5, into = c("county_abbr", "country"), sep = "-") %>% 
    mutate(country = car::recode(country, "'Great Britain (UK)'='United Kingdom'")) %>% 
    mutate(very_proud_num = ifelse(V46 == "I am very proud", 
                                   1 , 0)) %>% 
    mutate(proud_very_proud_num = ifelse(V46 == "I am very proud" | V46 == "I am somewhat proud", 1, 0))


## bootstrap means and 95/90% confidence intervals for proud/very proud
set.seed(134)
dat_issp_sum_proud_very_95 <- dat_issp %>% 
    group_by(country) %>% 
    do(data.frame(rbind(Hmisc::smean.cl.boot(.$proud_very_proud_num,
                                             conf.int = .95)))) 

set.seed(134)
dat_issp_sum_proud_very_90_ci <- dat_issp %>% 
    group_by(country) %>% 
    do(data.frame(rbind(Hmisc::smean.cl.boot(.$proud_very_proud_num,
                                             conf.int = .90)))) %>% 
    select(Upper_90 = Upper,
           Lower_90 = Lower,
           country)


## merge both datasets to plot 90% and 95% confidence intervals
dat_issp_sum_proud_very <- dat_issp_sum_proud_very_95 %>% 
    left_join(dat_issp_sum_proud_very_90_ci, by = "country") %>% 
    mutate(ire_dummy = ifelse(country == "Ireland", TRUE, FALSE))


#                # create plot with the proportions and 95% bootstrapped CIs

## Figure A01 ----
ggplot(dat_issp_sum_proud_very, aes(x = reorder(country, Mean), y = Mean,
                                    colour = ire_dummy)) +
    geom_point(size = 3) +
    geom_linerange(aes(ymin = Lower,
                       ymax = Upper),
                   size = 0.5) +
    geom_linerange(aes(ymin = Lower_90,
                       ymax = Upper_90),
                   size = 1.3) +
    geom_text(aes(label = country,
                  y = Lower),
              hjust = 1, nudge_y = -0.01) +
    coord_flip() +
    scale_colour_manual(values = c("grey70", "black")) +
    theme(legend.position = "none") + 
    scale_y_continuous(labels = scales::percent_format(accuracy = 1),
                       limits = c(0.6, 1)) +
    labs(y = "Respondents who are somewhat proud/very proud\nwhen country does well at an international sports competition" , x = NULL) +
    theme(legend.position = "none",
          axis.ticks.y = element_blank(),
          axis.text.y = element_blank())
ggsave("fig_a01.pdf", 
       width = 9, 
       height = 6)


## now check how often selected sports in certain countries are 
## selected as the most watched or second most watched sports by a respondent

## most watched sports (V44)
dat_issp_checksports <- dat_issp %>% 
    mutate(sport_relevant_first = case_when(
        country == "Ireland" & V44 == "Other team sports" ~ 1,
        country == "United States" & V44 == "American football" ~ 1,
        country == "Germany" & V44 == "Football, soccer" ~ 1,
        country == "United Kingdom" & V44 == "Football, soccer" ~ 1
        
    ))

## repeated the same for the second most watched sport (V45)
dat_issp_checksports <- dat_issp_checksports %>% 
    mutate(sport_relevant_second = case_when(
        country == "Ireland" & V45 == "Other team sports" ~ 1,
        country == "United States" & V45 == "American football" ~ 1,
        country == "Germany" & V45 == "Football, soccer" ~ 1,
        country == "United Kingdom" & V45 == "Football, soccer" ~ 1
        
    ))


#                # create dummy variables that measure whether a sports in a country
## is named as the most watched or second most watched sport
dat_issp_checksports <- dat_issp_checksports %>% 
    mutate(sport_relevant_first = ifelse(is.na(sport_relevant_first), 0, 
                                         sport_relevant_first)) %>%
    mutate(sport_relevant_second = ifelse(is.na(sport_relevant_second), 0, 
                                          sport_relevant_second)) %>% 
    mutate(sport_relevant_1_2 = ifelse(sport_relevant_first == 0,
                                       sport_relevant_second, 
                                       sport_relevant_first))


## rename the sport for each country
dat_issp_checksports <- dat_issp_checksports %>% 
    mutate(sport = case_when(
        country == "United Kingdom" ~ "Football/Soccer",
        country == "Ireland" ~ "Hurling and Gaelic Football",
        country == "Germany" ~ "Football/Soccer",
        country == "United States" ~ "American Football"
    ))


## select only four countries for the plot
dat_issp_firstsecond_select <- dat_issp_checksports %>% 
    filter(country %in% c("Germany", "United States", 
                          "United Kingdom",
                          "Ireland")) 

#                # create proportions and 95/90% confidence intervals
## for country and selected sport

set.seed(134)
dat_issp_firstsecond_95 <- dat_issp_firstsecond_select %>% 
    group_by(country, sport) %>% 
    do(data.frame(rbind(Hmisc::smean.cl.boot(.$sport_relevant_1_2,
                                             conf.int = .95)))) %>% 
    mutate(country_sport = paste0(country, "\n", "(", sport, ")"))

set.seed(134)
dat_issp_firstsecond_90 <- dat_issp_firstsecond_select %>% 
    group_by(country, sport) %>% 
    do(data.frame(rbind(Hmisc::smean.cl.boot(.$sport_relevant_1_2,
                                             conf.int = .90)))) %>% 
    mutate(country_sport = paste0(country, "\n", "(", sport, ")")) %>%
    ungroup() %>% 
    select(country_sport, Lower_90 = Lower, Upper_90 = Upper)


## merge both datasets for plot
dat_issp_firstsecond <- left_join(dat_issp_firstsecond_95,
                                  dat_issp_firstsecond_90,
                                  by = "country_sport")

## plot the proportion (displayed as percentages) of most watched/second most watched
## sports
ggplot(dat_issp_firstsecond, 
       aes(x = country_sport, y = Mean,
           ymin = Lower, ymax = Upper)) +
    geom_point(size = 3) +
    geom_linerange(aes(ymin = Lower,
                       ymax = Upper),
                   size = 0.5) +
    geom_linerange(aes(ymin = Lower_90,
                       ymax = Upper_90),
                   size = 1.3) +
    coord_flip() +
    scale_y_continuous(labels = scales::percent_format(accuracy = 1), 
                       limits = c(0, 0.5)) +
    labs(y = "Most frequently watched sports", x = NULL)
ggsave("fig_a02.pdf", width = 9, height = 3)


## script executed successfully on
date()

sessionInfo()
